# R script for the Homework 4 of 19-704
# Replication and extension of the results from Gerfin (1996)

# setwd("C:/Users/Alexandra/Documents/R")

library(AER)
data("SwissLabor")

# Having a first look at the data
labor <- data.frame(SwissLabor)
summary(labor)
head(labor)

# Creation of the variables
labor <- transform(labor, AGE = age)
labor <- transform(labor, AGESQ = (age*age))
labor <- transform(labor, EDUC = education)
labor <- transform(labor, NYC = youngkids)
labor <- transform(labor, NOC = oldkids)
labor <- transform(labor, NLINC = income)
labor <- transform(labor, FOREIGN = as.numeric(foreign) - 1)
labor <- transform(labor, PAR = as.numeric(participation) - 1)
head(labor)

# Replication -----------------------------------------------------------------------------------------

# Question 1 ----------------------------------------------------
laborreg <- glm(PAR ~ AGE + AGESQ + EDUC + NYC + NOC + NLINC + FOREIGN, 
               data = labor, family = binomial(link = "probit"))
summary(laborreg)

# Extension -------------------------------------------------------------------------------------------

# Question 1 ----------------------------------------------------

# Histograms
png(filename = "HW4_Cosseron_histogram1_Q1.png",
    width = 3000,
    height = 3000,
    res = 300)
par(cex = 1.3)
hist(labor$AGE,
     prob = FALSE,
     breaks = "fd",
     xlim = c(2, 7),
     ylim = c(0, 200),
     xlab = "Age in years divided by 10",
     main = "Histogram of age")
rug(labor$AGE, col = rgb(1, 0, 0, .3))
dev.off()

png(filename = "HW4_Cosseron_histogram2_Q1.png",
    width = 3000,
    height = 3000,
    res = 300)
par(cex = 1.3)
hist(labor$EDUC,
     prob = FALSE,
     breaks = "fd",
     xlim = c(0, 22),
     ylim = c(0, 200),
     xlab = "Years of formal education",
     main = "Histogram of education")
rug(labor$EDUC, col = rgb(1, 0, 0, .3))
dev.off()

png(filename = "HW4_Cosseron_histogram3_Q1.png",
    width = 3000,
    height = 3000,
    res = 300)
par(cex = 1.3)
hist(labor$NYC,
     breaks = "fd",
     xlim = c(0, 3),
     ylim = c(0, 200),
     xlab = "Number of young children",
     main = "Histogram of the number of young children")
rug(labor$NYC, col = rgb(1, 0, 0, .3))
dev.off()

png(filename = "HW4_Cosseron_histogram4_Q1.png",
    width = 3000,
    height = 3000,
    res = 300)
par(cex = 1.3)
hist(labor$NLINC,
     breaks = "fd",
     xlim = c(7, 13),
     ylim = c(0, 150),
     xlab = "Log of yearly non-labour income",
     main = "Histogram of the log of yearly non-labour income")
rug(labor$NLINC, col = rgb(1, 0, 0, .3))
dev.off()

png(filename = "HW4_Cosseron_histogram5_Q1.png",
    width = 3000,
    height = 3000,
    res = 300)
par(cex = 1.3)
hist(labor$NOC,
     breaks = "fd",
     xlim = c(0, 6),
     ylim = c(0, 400),
     xlab = "Number of older children",
     main = "Histogram of the number of older children")
rug(labor$NOC, col = rgb(1, 0, 0, .3))
dev.off()

# Tables
install.packages("vcd", repos = "http://lib.stat.cmu.edu/R/CRAN/")
library(vcd)

structable(~ participation, labor)
structable(~ foreign, labor)

# Question 3 -----------------------------------

laborreg3 <- glm(PAR ~ AGE + AGESQ + EDUC + NYC + NOC + NLINC + FOREIGN, 
                data = labor, family = binomial(link = "logit"))
summary(laborreg3)
